home
***
CD-ROM
|
disk
|
FTP
|
other
***
search
/
Tech Arsenal 1
/
Tech Arsenal (Arsenal Computer).ISO
/
tek-04
/
bipl.zip
/
PROGS.ZIP
/
PARGEN.ICN
< prev
next >
Wrap
Text File
|
1992-11-26
|
6KB
|
201 lines
############################################################################
#
# File: pargen.icn
#
# Subject: Program to generate context-free parser
#
# Author: Ralph E. Griswold
#
# Date: March 31, 1992
#
###########################################################################
#
# This program reads a context-free BNF grammar and produces an Icon
# program that is a parser for the corresponding language.
#
# Nonterminal symbols are are enclosed in angular brackets. Vertical
# bars separate alternatives. All other characters are considered to
# be terminal symbols. The nonterminal symbol on the first line is
# taken to be the goal.
#
# An example is:
#
# <expression>::=<term>|<term>+<expression>
# <term>::=<element>|<element>*<term>
# <element>::=x|y|z|{<expression>}
#
# Parentheses can be used for grouping symbols, as in
#
# <term>::=<element>(|*<term>)
#
# Note that an empty alternative is allowable.
#
# The right-hand side metacharacters <, >, (, ), and | are accessible
# through the built-in symbols <lb>, <rb>, <lp>, <rp>, and <vb>,
# respectively. There are two other build-in symbols, <empty> and <nl>
# that match the empty string and a newline, respectively.
#
# Characters in nonterminal names are limited to letters, digits, and
# underscores.
#
# An underscore is appended to the parsing procedure name to avoid
# possible collisions with Icon function names.
#
# Lines beginning with an = are passed through unchanged. This allows
# Icon declarations to be placed in the parser. Lines beginning with
# a # are considered to be comments and are ignored.
#
# If the name of a ucode file is given on the command line, a link
# declaration for it is provided in the output. Otherwise the main
# procedure in recog is used.
#
############################################################################
#
# Limitations:
#
# Left recursion in the grammar may cause the parser to loop.
# There is no check that all nonterminal symbols that are referenced
# are defined or that there may be duplicate definitions.
#
############################################################################
#
# Reference:
#
# The Icon Programming Language, Second Edition, Ralph E. and Madge T.
# Griswold, Prentice-Hall, 1990, pp. 180-187.
#
############################################################################
#
# Output links recog, matchlib
#
# See also: recog.icn, matchlib.icn, and parscond.icn
#
############################################################################
global declend # name suffix and record body
global goal # nonterminal goal name
global nchars # characters allowed in a nonterminal name
global procend # name suffix and parens
global sym # current nonterminal symbol
procedure main(args)
local line # a line of input
declend := "__"
procend := "_()"
nchars := &letters ++ &digits ++ '_'
while line := read() do { # process lines of input
line ? {
case move(1) of { # action depends on first character
"<": tab(0) ? transprod() # transform the production
"=": write(tab(0)) # pass through
"#": &null # ignore
default: error()
} # end case
} # end scan
} # end while
write("link ",args[1] | "recog") # link main procedure
write("link matchlib") # link built-in symbols
write("global goal\n") # write out global declaration
write("procedure init()") # write out initialization procedure
write(" goal := ",goal,"_")
write(" return")
write("end")
end
#
# Transform a production.
#
procedure transprod()
{
sym := tab(many(nchars)) & # get the nonterminal name
=">::="
} | error() # catch syntactic error
write("record ",sym,declend,"(alts)")# record declaration
write("procedure ",sym,procend) # procedure header
write(" suspend {") # begin the suspend expression
writes(" ",sym,declend,"(") # write indentation
transalts() # transform the alternatives
write(")")
write(" }") # end the suspend expression
write("end") # end the procedure declaration
write() # space between declarations
/goal := sym # first symbol is goal
end
#
# Transform a sequence of alternatives.
#
procedure transalts()
local alt # an alternative
while alt := tab(bal('|') | 0) do { # process alternatives
writes("[") # record for alternative
alt ? transseq() # transform the symbols
if move(1) then writes("] | ") # if more, close the parentheses
# and add the alternation.
else {
writes("]") # no more, so just close the parentheses
break
} # end else
} # end while
end
#
# Transform a sequence of symbols.
#
procedure transseq()
repeat {
transsym() # process a symbols
if not pos(0) then writes(" , ") # if there's more, provide concatenation
else break # else get out and return
} # end while
return
end
#
# Transform a symbol.
#
procedure transsym()
local group
if ="<" then { # if it's a nonterminal
{ # write it with suffix.
writes(tab(many(nchars)),procend) &
=">" # get rid of closing bracket
} | error() # or catch the error
} # end then
else if ="(" then { # if it's a parenthesis, pass it
writes("(") # along and call transseq()
group := tab(bal(')')) | error()
group ? transalts()
writes(")")
move(1)
}
# else transform nonterminal string
else writes("=",image(tab(upto('<') | 0)))
return
end
#
# Issue error message and terminate execution.
#
procedure error()
stop("*** malformed definition: ",tab(0))
end